#coding:utf8

from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StringType, IntegerType, StructField
import pyspark.sql.functions as F

# 用dsl风格实现单词计数
if __name__ == '__main__':
    # 构建SparkSession执行环境入口对象
    spark = SparkSession.builder.\
        appName("test_parque").\
        master("local[*]").\
        getOrCreate()

    df = spark.read.format("text").load('../data/input/words.txt')

    # withColumn：对已存在的列进行操作，返回新的列（列名相同则覆盖）
    # F.split：将一列拆分成多列
    # 通过explods函数将单列拆分开来
    df2 = df.withColumn("value", F.explode(F.split(df['value']," ")))
    df2.groupBy("value").\
        count().show()
